import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set(color_codes=True)
df = pd.read_csv('https://raw.githubusercontent.com/Akhilvijaykumar/CARS.csv/main/CARS.csv')
sns.boxplot(x=df['MSRP'])
<AxesSubplot:xlabel='MSRP'>
fig, ax = plt.subplots(figsize=(5,5))
ax.scatter(df['Horsepower'], df['MSRP'])
plt.title('Scatter plot between MSRP and Horsepower')
ax.set_xlabel('Horsepower')
ax.set_ylabel('MSRP')
plt.show()
def find_outliers_IQR(df):
q1=df.quantile(0.25)
q3=df.quantile(0.75)
IQR=q3-q1
outliers = df[((df<(q1-1.5*IQR)) | (df>(q3+1.5*IQR)))]
return outliers
outliers = find_outliers_IQR(df['MSRP'])
print('number of outliers: '+ str(len(outliers)))
print('max outlier value: '+ str(outliers.max()))
print('min outlier value: '+ str(outliers.min()))
number of outliers: 996 max outlier value: 2065902 min outlier value: 74100
import plotly.express as px
fig = px.histogram(df, x='MSRP')
fig.show()
import pandas as pd
from matplotlib import pyplot as plt
# Read CSV into pandas
data = pd.read_csv(url)
data.head()
df = pd.DataFrame(data)
name = df['Model'].head(12)
price = df['MSRP'].head(12)
# Figure Size
fig = plt.figure(figsize =(10, 7))
# Horizontal Bar Plot
plt.bar(name[0:10], price[0:10])
# Show Plot
plt.show()
cars = ['AUDI', 'BMW', 'FORD',
'TESLA', 'JAGUAR', 'MERCEDES']
data = [23, 17, 35, 29, 12, 41]
# Creating plot
fig = plt.figure(figsize =(10, 7))
plt.pie(data, labels = cars)
# show plot
plt.show()